biobakery_function <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_biobakery_functions.R")
alpha <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_alpha.R")
beta <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_beta.R")
taxa <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_taxa_tests.R")
norm <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_normalisation.R")
heat <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_heatmap.R")
varia <- ("~/Documents/GitHub/DivComAnalyses/R/phyloseq_varia.R")
sapply(c(biobakery_function, alpha, beta, taxa, norm, heat, varia), source, chdir = TRUE)
input <- list(metpahlan_ps = "~/Documents/GitHub/oral-microbiome/data/processed/metaphlan_ps.RDS",
motus_ps = "~/Documents/GitHub/oral-microbiome/data/processed/motus/physeq.RDS",
pathway_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_pw_ps.RDS",
kegg_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_kegg_ps.RDS",
l4c_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_l4c_ps.RDS",
go_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_go_ps.RDS",
infogo_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_infogo_ps.RDS",
metac_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_metac_ps.RDS",
pfam_ps = "~/Documents/GitHub/oral-microbiome/data/processed/humann_pfam_ps.RDS")
Data loading:
purrr::map(input,
readRDS) -> objects
Investigate most abundant taxa over sites:
sample_data(objects$metpahlan_ps$physeq)$Site_Health <- paste0(sample_data(objects$metpahlan_ps$physeq)$Oral_Site,
"_",
sample_data(objects$metpahlan_ps$physeq)$Health_status)
physeq_most_abundant(physeq = objects$metpahlan_ps$physeq,
group_var = "Site_Health",
ntax = 3) -> most_abundant
most_abundant
## [1] "Actinomyces_oris" "Corynebacterium_matruchotii"
## [3] "Neisseria_flavescens" "Rothia_dentocariosa"
## [5] "Rothia_mucilaginosa" "Streptococcus_parasanguinis"
## [7] "Streptococcus_salivarius" "Veillonella_atypica"
## [9] "Veillonella_parvula"
Heatmap those:
objects$metpahlan_ps$physeq %>%
transform_sample_counts(function(x) x/sum(x) * 100) %>%
phyloseq::subset_taxa(Species %in% most_abundant) %>%
phyloseq_ampvis_heatmap(transform = FALSE,
group_by = "Subject",#treatment
facet_by = c("Oral_Site", "Health_status", "Subject"),
ntax = 100,
tax_aggregate = "Species",
tax_add = NULL) -> p
## Warning: There are only 9 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 1, 10, 25, 50, 75, 100),
labels = c(0, 0.01, 1, 10, 25,50, 75, 100),
trans = scales::pseudo_log_trans(sigma = 0.1)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) +
theme(axis.text.y = element_text(angle = 0, size = 8))
Test bacterial activity are compatible in Saliva vs local site only in health:
Saliva & Tongue
objects$metpahlan_ps$physeq %>%
subset_samples(Health_status == "Healthy control") %>%
subset_samples(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) -> tmp
tmp %>%
physeq_most_abundant(physeq = .,
group_var = "Oral_Site",
ntax = 5) -> ma_saliva_tongue
ma_saliva_tongue
## [1] "Actinomyces_sp_ICM47" "Neisseria_flavescens"
## [3] "Prevotella_melaninogenica" "Rothia_mucilaginosa"
## [5] "Streptococcus_parasanguinis" "Streptococcus_salivarius"
## [7] "Veillonella_atypica"
Heatmap those:
tmp %>%
transform_sample_counts(function(x) x/sum(x) * 100) %>%
phyloseq::subset_taxa(Species %in% ma_saliva_tongue) %>%
phyloseq_ampvis_heatmap(transform = FALSE,
group_by = "Subject",#treatment
facet_by = c("Oral_Site", "Health_status", "Subject"),
ntax = 100,
tax_aggregate = "Species",
tax_add = NULL) -> p
## Warning: There are only 7 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 1, 10, 25, 50, 75, 100),
labels = c(0, 0.01, 1, 10, 25,50, 75, 100),
trans = scales::pseudo_log_trans(sigma = 0.1)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) +
theme(axis.text.y = element_text(angle = 0, size = 8))
Looks good. We want to check their activities among those two sites.
objects$pathway_ps$physeq %>%
humann2_species_contribution(meta_data_var = c("Subject", "sample_Sample" ,"Type", "Oral_Site", "Health_status")) %>%
separate(Feature, into = c("code", "name"), sep = ": ", remove = FALSE) %>%
dplyr::filter(!is.na(name)) -> pwy
## Warning in speedyseq::psmelt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 720 rows [1029,
## 1903, 1961, 2338, 2461, 2854, 3562, 3891, 4578, 5649, 5690, 5849, 5854, 6607,
## 6843, 7028, 7077, 7309, 7819, 8576, ...].
pwy %>%
distinct(name, .keep_all = TRUE) %>%
DT::datatable()
pwy %>%
dplyr::filter(Health_status == "Healthy control",
Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_plot(x_plot = "log10(DNA)",
y_plot = "log10(RNA)",
color = "Species",
fill = "Species",
shape = "Genus",
group = "Species",
filter_species = ma_saliva_tongue,
facet_formula = "Oral_Site ~ .") -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot +
scale_fill_viridis_d() +
scale_color_viridis_d()
# plot$plot %>%
# plotly::ggplotly()
Boxplot of each speices at each site based on overall pathway.
pwy %>%
dplyr::filter(Health_status == "Healthy control",
Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
# filter(RNA_DNA > mean(RNA_DNA, na.rm = TRUE)) %>%
# dplyr::filter(grepl("ose",name)) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "Species",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = ma_saliva_tongue,
facet_formula = ". ~ .",
export_legend = TRUE) -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot + ggpubr::rotate()
plot$plot$data %>%
ggpubr::compare_means(RNA_DNA ~ Oral_Site,
group.by = c("Species"),
data = .,
method = "wilcox.test",
p.adjust.method = "fdr") %>%
filter(p.adj < 0.05) %>%
DT::datatable()
From here we can focus at Strep. parasanguinis and check the pathways expressed by patients in diff/ sites:
pwy %>%
dplyr::filter(Health_status == "Healthy control",
Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = "Streptococcus_parasanguinis",
facet_formula = " . ~ Health_status",
export_legend = TRUE) -> plot
plot$legend
plot$plot
Test for signicant differences and same figure
plot$plot$data %>%
mutate(log2RNA_DNA = log2(RNA_DNA)) %>%
ggpubr::compare_means(log2RNA_DNA ~ Oral_Site,
group.by = c("Feature"),
data = .,
# method = "wilcox.test",
p.adjust.method = "fdr") %>%
filter(p.adj < 0.05) -> diff_signif
diff_signif %>%
DT::datatable()
diff_signif$Feature
## [1] "PWY-1042: glycolysis IV"
## [2] "PWY-4041: γ-glutamyl cycle"
## [3] "PWY-6609: adenine and adenosine salvage III"
## [4] "ILEUSYN-PWY: L-isoleucine bios. I"
## [5] "VALSYN-PWY: L-valine bios."
## [6] "PWY-6386: UDP-N-acetylmuramoyl-pentapeptide bios. II"
## [7] "BRANCHED-CHAIN-AA-SYN-PWY: spw of branched amino acid bios."
## [8] "PWY-5103: L-isoleucine bios. III"
## [9] "PWY-3001: spw of L-isoleucine bios. I"
## [10] "THRESYN-PWY: spw of L-threonine bios."
## [11] "PWY-6387: UDP-N-acetylmuramoyl-pentapeptide bios. I"
## [12] "PWY-5686: UMP bios."
## [13] "TRNA-CHARGING-PWY: tRNA charging"
pwy %>%
dplyr::filter(Health_status == "Healthy control",
Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_feature = diff_signif$Feature,
filter_species = "Streptococcus_parasanguinis",
facet_formula = " . ~ Health_status",
export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$plot
pwy %>%
# dplyr::filter(Health_status == "Healthy control",
dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Health_status",
fill = "Health_status",
shape = NULL,
filter_species = "Streptococcus_parasanguinis",
facet_formula = " . ~ Oral_Site ",
export_legend = TRUE) -> plot
plot$legend
plot$plot
plot$plot$data %>%
mutate(log2RNA_DNA = log2(RNA_DNA)) %>%
ggpubr::compare_means(log2RNA_DNA ~ Health_status,
group.by = c("Oral_Site","Feature"),
data = .,
# method = "wilcox.test",
p.adjust.method = "fdr") %>%
filter(p.adj < 0.05) -> diff_signif
diff_signif %>%
DT::datatable()
diff_signif$Feature
## [1] "PWY-1042: glycolysis IV"
## [2] "PWY-5100: pyruvate fermentation to acetate and lactate II"
## [3] "PWY-7219: adenosine ribonucleotides de novo bios."
## [4] "PWY-6121: 5-aminoimidazole ribonucleotide bios. I"
## [5] "PWY-6122: 5-aminoimidazole ribonucleotide bios. II"
## [6] "PWY-6277: spw of 5-aminoimidazole ribonucleotide bios."
## [7] "BRANCHED-CHAIN-AA-SYN-PWY: spw of branched amino acid bios."
## [8] "PWY-5103: L-isoleucine bios. III"
## [9] "PWY-6936: seleno-amino acid bios."
## [10] "PWY-3001: spw of L-isoleucine bios. I"
## [11] "PWY-6386: UDP-N-acetylmuramoyl-pentapeptide bios. II"
## [12] "TRNA-CHARGING-PWY: tRNA charging"
## [13] "THRESYN-PWY: spw of L-threonine bios."
## [14] "PWY-6387: UDP-N-acetylmuramoyl-pentapeptide bios. I"
## [15] "PWY-6123: inosine-5'-phosphate bios. I"
pwy %>%
# dplyr::filter(Health_status == "Healthy control",
# dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Health_status",
fill = "Health_status",
shape = NULL,
filter_species = "Streptococcus_parasanguinis",
filter_feature = diff_signif %>% filter(Oral_Site == "TONGUE_BIOFILM") %>% pull(Feature),
facet_formula = " . ~ Oral_Site ",
export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot
pwy %>%
# dplyr::filter(Health_status == "Healthy control",
# dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
dplyr::filter(Oral_Site %in% c("SALIVA")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Health_status",
fill = "Health_status",
shape = NULL,
filter_species = "Streptococcus_parasanguinis",
filter_feature = diff_signif %>% filter(Oral_Site == "SALIVA") %>% pull(Feature),
facet_formula = " . ~ Oral_Site ",
export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot
See from streptococci point of view:
objects$metpahlan_ps$physeq %>%
subset_samples(Health_status == "Healthy control") %>%
subset_samples(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) -> tmp
tmp %>%
physeq_most_abundant(physeq = .,
group_var = "Oral_Site",
ntax = 3,
tax_level = "Genus") -> ma_saliva_tongue_g
ma_saliva_tongue_g
## [1] "Neisseria" "Rothia" "Streptococcus" "Veillonella"
Heatmap those:
tmp %>%
transform_sample_counts(function(x) x/sum(x) * 100) %>%
phyloseq::subset_taxa(Genus %in% ma_saliva_tongue_g) %>%
phyloseq_ampvis_heatmap(transform = FALSE,
group_by = "Subject",#treatment
facet_by = c("Oral_Site", "Health_status", "Subject"),
ntax = 100,
tax_aggregate = "Genus",
tax_add = NULL) -> p
## Warning: There are only 4 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 1, 10, 25, 50, 75, 100),
labels = c(0, 0.01, 1, 10, 25,50, 75, 100),
trans = scales::pseudo_log_trans(sigma = 0.1)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) +
theme(axis.text.y = element_text(angle = 0, size = 8))
strep. species?
objects$metpahlan_ps$physeq %>%
subset_samples(Health_status == "Healthy control") %>%
subset_samples(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) -> tmp
tmp %>%
subset_taxa(Genus %in% c("Streptococcus")) %>%
physeq_most_abundant(physeq = .,
group_var = "Oral_Site",
ntax = 5,
tax_level = "Species") -> ma_saliva_tongue_strep
ma_saliva_tongue_strep
## [1] "Streptococcus_australis" "Streptococcus_infantis"
## [3] "Streptococcus_mitis" "Streptococcus_parasanguinis"
## [5] "Streptococcus_salivarius" "Streptococcus_sanguinis"
## [7] "Streptococcus_sp_A12"
tmp %>%
transform_sample_counts(function(x) x/sum(x) * 100) %>%
phyloseq::subset_taxa(Species %in% ma_saliva_tongue_strep) %>%
phyloseq_ampvis_heatmap(transform = FALSE,
group_by = "Subject",#treatment
facet_by = c("Oral_Site", "Health_status", "Subject"),
ntax = 100,
tax_aggregate = "Species",
tax_add = NULL) -> p
## Warning: There are only 7 taxa, showing all
p + facet_grid(. ~ Oral_Site + Health_status, scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 1, 10, 25, 50, 75, 100),
labels = c(0, 0.01, 1, 10, 25,50, 75, 100),
trans = scales::pseudo_log_trans(sigma = 0.1)) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 6)) +
theme(axis.text.y = element_text(angle = 0, size = 8))
let’s focus on the three most abundant:
strep_sel <- c("Streptococcus_infantis", "Streptococcus_parasanguinis", "Streptococcus_salivarius")
pwy %>%
dplyr::filter(Health_status == "Healthy control",
Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
# filter(RNA_DNA > mean(RNA_DNA, na.rm = TRUE)) %>%
# dplyr::filter(grepl("ose",name)) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "Species",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = strep_sel,
facet_formula = ". ~ .",
export_legend = TRUE) -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot + ggpubr::rotate()
plot$plot$data %>%
ggpubr::compare_means(RNA_DNA ~ Oral_Site,
group.by = c("Species"),
data = .,
method = "wilcox.test",
p.adjust.method = "fdr") %>%
filter(p.adj < 0.05) %>%
DT::datatable()
pwy %>%
dplyr::filter(Health_status == "Healthy control",
Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = strep_sel,
facet_formula = " . ~ Health_status + Species",
export_legend = TRUE) -> plot
## Warning in if (filter_species != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot
We could pintpoint pathways of interests here. test for significant diff + health vs perio. stay at pathway level before going deeper.
plot$plot$data %>%
# mutate(log2RNA_DNA = log2(RNA_DNA)) %>%
ggpubr::compare_means(RNA_DNA ~ Oral_Site,
group.by = c("Species","Feature"),
data = .,
# method = "wilcox.test",
p.adjust.method = "fdr") %>%
filter(p.adj < 0.05) -> diff_signif
diff_signif %>%
DT::datatable()
pwy %>%
dplyr::filter(Health_status == "Healthy control") %>%
dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
# dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = "Streptococcus_infantis",
filter_feature = diff_signif %>% filter(Species == "Streptococcus_infantis") %>% pull(Feature),
facet_formula = " . ~ Health_status ",
export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot
pwy %>%
dplyr::filter(Health_status == "Healthy control") %>%
dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
# dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = "Streptococcus_parasanguinis",
filter_feature = diff_signif %>% filter(Species == "Streptococcus_parasanguinis") %>% pull(Feature),
facet_formula = " . ~ Health_status ",
export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot
Streptococcus_salivarius
pwy %>%
dplyr::filter(Health_status == "Healthy control") %>%
dplyr::filter(Oral_Site %in% c("SALIVA", "TONGUE_BIOFILM")) %>%
# dplyr::filter(Oral_Site %in% c("TONGUE_BIOFILM")) %>%
humann2_RNA_DNA_ratio_plot(x_plot = "name",
y_plot = "log2(RNA_DNA)",
color = "Oral_Site",
fill = "Oral_Site",
shape = NULL,
filter_species = "Streptococcus_salivarius",
filter_feature = diff_signif %>% filter(Species == "Streptococcus_salivarius") %>% pull(Feature),
facet_formula = " . ~ Health_status ",
export_legend = TRUE) -> plot
## Warning in if (filter_feature != FALSE) {: the condition has length > 1 and only
## the first element will be used
plot$legend
plot$plot